/*****************************************************************************\
 * src/slurmd/slurmstepd/ulimits.c - set user limits for job
 *****************************************************************************
 *  Copyright (C) 2002-2007 The Regents of the University of California.
 *  Copyright (C) 2008-2010 Lawrence Livermore National Security.
 *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
 *  Written by Mark Grondona <mgrondona@llnl.gov>.
 *  CODE-OCEC-09-009. All rights reserved.
 *
 *  This file is part of Slurm, a resource management program.
 *  For details, see <https://slurm.schedmd.com/>.
 *  Please also read the included file: DISCLAIMER.
 *
 *  Slurm is free software; you can redistribute it and/or modify it under
 *  the terms of the GNU General Public License as published by the Free
 *  Software Foundation; either version 2 of the License, or (at your option)
 *  any later version.
 *
 *  In addition, as a special exception, the copyright holders give permission
 *  to link the code of portions of this program with the OpenSSL library under
 *  certain conditions as described in each individual source file, and
 *  distribute linked combinations including the two. You must obey the GNU
 *  General Public License in all respects for all of the code used other than
 *  OpenSSL. If you modify file(s) with this exception, you may extend this
 *  exception to your version of the file(s), but you are not obligated to do
 *  so. If you do not wish to do so, delete this exception statement from your
 *  version.  If you delete this exception statement from all source files in
 *  the program, then also delete it here.
 *
 *  Slurm is distributed in the hope that it will be useful, but WITHOUT ANY
 *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
 *  details.
 *
 *  You should have received a copy of the GNU General Public License along
 *  with Slurm; if not, write to the Free Software Foundation, Inc.,
 *  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA.
\*****************************************************************************/

#define _GNU_SOURCE /* Required for prlimit */

#include "config.h"

#include <stdlib.h>
#include <string.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>

#include "src/common/env.h" /* For unsetenvp() */
#include "src/common/log.h"
#include "src/common/macros.h"
#include "src/common/read_config.h"
#include "src/common/slurm_rlimits_info.h"
#include "src/common/strlcpy.h"
#include "src/common/xmalloc.h"
#include "src/common/xstring.h"

#include "src/slurmd/slurmstepd/slurmstepd_job.h"

/*
 * Prototypes:
 *
 */
static int _get_env_val(char **env, const char *name, unsigned long *valp,
		bool *u_req_propagate);
static int _set_limit(char **env, slurm_rlimits_info_t *rli);

/*
 * prlimit() only exists on Linux, so on Linux simply call it.  For non-Linux
 * systems, define a function that wraps get/setrlimit() and don't expect a pid.
 * The pid is currently only used when using pam_slurm_adopt, which is only
 * supported on Linux.
 */
#ifdef __linux__
#define _prlimit(pid, resource, new_limit, old_limit) \
	prlimit(pid, resource, new_limit, old_limit)
#else
static int _prlimit(pid_t pid, int resource, const struct rlimit *new_limit,
		     struct rlimit *old_limit)
{
	xassert(pid == 0);
	xassert(!(new_limit && old_limit));

	if (new_limit)
		return setrlimit(resource, new_limit);

	xassert(old_limit);
	return getrlimit(resource, old_limit);
}
#endif

/*
 * Set user resource limits using the values of the environment variables
 * of the name "SLURM_RLIMIT_*" that are found in step->env.
 *
 * The sys admin can control the propagation of user limits in the slurm
 * conf file by setting values for the PropagateResourceRlimits and
 * ResourceLimits keywords.
 *
 * NOTE: THIS FUNCTION SHOULD ONLY BE CALLED RIGHT BEFORE THE EXEC OF
 * A SCRIPT AFTER THE FORK SO AS TO LIMIT THE ABOUT OF EFFECT THE
 * LIMITS HAVE WHEN COMBINED WITH THE SLURMSTEPD.  RLIMIT_FSIZE IS THE
 * MAIN REASON SINCE IF THE USER SETS THIS TO BE LOWER THAN THE SIZE
 * OF THE CURRENT SLURMD.LOG THE STEPD WILL CORE THE NEXT TIME
 * ANYTHING IS WRITTEN TO IT.  SO IF RUNNING +DEBUG2 AND THE USER IS
 * GETTING CORES WITH FILE SYSTEM LIMIT ERRORS THIS IS THE REASON.
 *
 * NOTE: The slurmstepd will not normally write a core file due to setuid().
 * Run as normal user to disable setuid() and permit a core file to be written.
 */

extern void set_user_limits(stepd_step_rec_t *step, pid_t pid)
{
#ifdef RLIMIT_AS
#define SLURM_RLIMIT_VSIZE RLIMIT_AS
#define SLURM_RLIMIT_VNAME "RLIMIT_AS"
#elif defined(RLIMIT_DATA)
/* RLIMIT_DATA is useless on many systems which provide anonymous
 * mmap()'s in addition to brk(), use it here only as a fallback for
 * oddball systems lacking RLIMIT_AS. */
#define SLURM_RLIMIT_VSIZE RLIMIT_DATA
#define SLURM_RLIMIT_VNAME "RLIMIT_DATA"
#endif
	slurm_rlimits_info_t *rli;
	struct rlimit r;
	rlim_t task_mem_bytes;
	int rlimit_rc;

	if (_prlimit(pid, RLIMIT_CPU, NULL, &r) == 0) {
		if (r.rlim_max != RLIM_INFINITY) {
			error("Slurm process CPU time limit is %d seconds",
			      (int) r.rlim_max);
		}
	}

	for (rli = get_slurm_rlimits_info(); rli->name; rli++)
		_set_limit(step->env, rli);

	/* Set soft and hard rss and vsize limit for this process,
	 * handle job limit (for all spawned processes) in slurmd */
	task_mem_bytes  = step->step_mem;	/* MB */
	task_mem_bytes *= (1024 * 1024);

	/* Many systems, Linux included, ignore RSS limits, but set it
	 * here anyway for consistency and to provide a way for
	 * applications to interrogate what the RSS limit is (with the
	 * caveat that the real RSS limit is over all job tasks on the
	 * node and not per process, but hopefully this is better than
	 * nothing).  */
#ifdef RLIMIT_RSS
	rlimit_rc = _prlimit(pid, RLIMIT_RSS, NULL, &r);
	if ((task_mem_bytes) && !rlimit_rc && (r.rlim_max > task_mem_bytes)) {
		r.rlim_max =  r.rlim_cur = task_mem_bytes;
		if (_prlimit(pid, RLIMIT_RSS, &r, NULL)) {
			/* Indicates that limit has already been exceeded */
			fatal("_prlimit(RLIMIT_RSS, %"PRIu64" MB): %m",
			      step->step_mem);
		} else
			debug2("Set task rss(%"PRIu64" MB)", step->step_mem);
		if (get_log_level() >= LOG_LEVEL_DEBUG2) {
			_prlimit(pid, RLIMIT_RSS, NULL, &r);
			debug2("Task RSS limits from _prlimit: rlim_cur:%lu rlim_max:%lu",
			       r.rlim_cur, r.rlim_max);
		}
	} else if (rlimit_rc) {
		error("_prlimit(RLIMIT_RSS,..) failed with %m");
	} else {
		debug2("Not setting task rss rlimit, task bytes: %lu, rlimit_max: %lu",
		       task_mem_bytes, r.rlim_max);
	}
#endif

#ifdef SLURM_RLIMIT_VSIZE
	rlimit_rc = _prlimit(pid, SLURM_RLIMIT_VSIZE, NULL, &r);
	if ((task_mem_bytes) && slurm_conf.vsize_factor && !rlimit_rc &&
	    (r.rlim_max > task_mem_bytes)) {
		r.rlim_max = task_mem_bytes * (slurm_conf.vsize_factor / 100.0);
		r.rlim_cur = r.rlim_max;
		if (_prlimit(pid, SLURM_RLIMIT_VSIZE, &r, NULL)) {
			/* Indicates that limit has already been exceeded */
			fatal("_prlimit(%s, %"PRIu64" MB): %m",
			      SLURM_RLIMIT_VNAME, step->step_mem);
		} else
			debug2("Set task vsize(%"PRIu64" MB)", step->step_mem);
		if (get_log_level() >= LOG_LEVEL_DEBUG2) {
			_prlimit(pid, SLURM_RLIMIT_VSIZE, NULL, &r);
			debug2("task VSIZE limits: rlim_cur:%lu rlim_max:%lu",
			       r.rlim_cur, r.rlim_max);
		}
	} else if (rlimit_rc) {
		error("_prlimit(SLURM_RLIMIT_VSIZE,,..) failed with %m");
	} else {
		debug2("Not setting task vsize rlimit, task bytes: %lu, rlimit_max: %lu",
		       task_mem_bytes, r.rlim_max);
	}
#endif
}

/*
 *  Return an rlimit as a string suitable for printing.
 */
static char * rlim_to_string (unsigned long rlim, char *buf, size_t n)
{
	if (rlim == (unsigned long) RLIM_INFINITY)
		strlcpy (buf, "inf", n);
	else
		snprintf (buf, n, "%lu", rlim);
	return (buf);
}

/* Set umask using value of env var SLURM_UMASK */
extern int
set_umask(stepd_step_rec_t *step)
{
	mode_t mask;
	char *val;

	if (!(val = getenvp(step->env, "SLURM_UMASK"))) {
		if (step->step_id.step_id != SLURM_EXTERN_CONT)
			debug("Couldn't find SLURM_UMASK in environment");
		return SLURM_ERROR;
	}

	mask = strtol(val, (char **)NULL, 8);
	if ((step->step_id.step_id == SLURM_EXTERN_CONT) ||
	    (step->step_id.step_id == SLURM_INTERACTIVE_STEP) ||
	    (step->step_id.step_id == SLURM_BATCH_SCRIPT))
		unsetenvp(step->env, "SLURM_UMASK");
	umask(mask);
	return SLURM_SUCCESS;
}

/*
 * Set rlimit using value of env vars such as SLURM_RLIMIT_FSIZE if
 * the slurm config file has PropagateResourceLimits set or the user
 * requested it with srun/sbatch --propagate.
 *
 * NOTE: THIS FUNCTION SHOULD ONLY BE CALLED RIGHT BEFORE THE EXEC OF
 * A SCRIPT AFTER THE FORK SO AS TO LIMIT THE ABOUT OF EFFECT THE
 * LIMITS HAVE WHEN COMBINED WITH THE SLURMSTEPD.  RLIMIT_FSIZE IS THE
 * MAIN REASON SINCE IF THE USER SETS THIS TO BE LOWER THAN THE SIZE
 * OF THE CURRENT SLURMD.LOG THE STEPD WILL CORE THE NEXT TIME
 * ANYTHING IS WRITTEN TO IT.  SO IF RUNNING +DEBUG2 AND THE USER IS
 * GETTING CORES WITH FILE SYSTEM LIMIT ERRORS THIS IS THE REASON.
 */
static int
_set_limit(char **env, slurm_rlimits_info_t *rli)
{
	unsigned long env_value;
	char max[24], cur[24], req[24];
	struct rlimit r;
	bool u_req_propagate;  /* e.g. true if 'srun --propagate' */
	char *env_name = NULL, *rlimit_name;
	int rc = SLURM_SUCCESS;

	xstrfmtcat(env_name, "SLURM_RLIMIT_%s", rli->name);
	rlimit_name = xstrdup(env_name + 6);
	if (_get_env_val(env, env_name, &env_value, &u_req_propagate)) {
		debug("Couldn't find %s in environment", env_name);
		xfree(env_name);
		return SLURM_ERROR;
	}

	/*
	 * Users shouldn't get the SLURM_RLIMIT_* env vars in their environ
	 */
	unsetenvp(env, env_name);
	xfree(env_name);

	/*
	 * We'll only attempt to set the propagated soft rlimit when indicated
	 * by the slurm conf file settings, or the user requested it.
	 */
	if ( ! (rli->propagate_flag == PROPAGATE_RLIMITS || u_req_propagate))
		goto cleanup;

	if (getrlimit( rli->resource, &r ) < 0) {
		error("getrlimit(%s): %m", rlimit_name);
		rc = SLURM_ERROR;
		goto cleanup;
	}

	/*
	 * Nothing to do if the rlimit won't change
	 */
	if (r.rlim_cur == (rlim_t) env_value) {
		debug2( "_set_limit: %s setrlimit %s no change in value: %lu",
			u_req_propagate?"user":"conf", rlimit_name,
			(unsigned long) r.rlim_cur);
		goto cleanup;
	}

	debug2("_set_limit: %-14s: max:%s cur:%s req:%s", rlimit_name,
		rlim_to_string (r.rlim_max, max, sizeof (max)),
		rlim_to_string (r.rlim_cur, cur, sizeof (cur)),
		rlim_to_string (env_value,  req, sizeof (req)) );

	r.rlim_cur = (rlim_t) env_value;
	if ((!u_req_propagate) && (r.rlim_cur > r.rlim_max)) {
		verbose("%s: %-14s: reducing req:%s to max:%s",
			__func__,
			rlimit_name,
			rlim_to_string(env_value, req, sizeof(req)),
			rlim_to_string(r.rlim_max, max, sizeof(max)));

		r.rlim_cur = r.rlim_max;
	}

	if (setrlimit( rli->resource, &r ) < 0) {
		/*
		 * Report an error only if the user requested propagate
		 */
		if (u_req_propagate) {
			error( "Can't propagate %s of %s from submit host: %m",
				rlimit_name,
				r.rlim_cur == RLIM_INFINITY ? "'unlimited'" :
				rlim_to_string( r.rlim_cur, cur, sizeof(cur)));
		} else {
			verbose("Can't propagate %s of %s from submit host: %m",
				rlimit_name,
				r.rlim_cur == RLIM_INFINITY ? "'unlimited'" :
				rlim_to_string( r.rlim_cur, cur, sizeof(cur)));
		}
		rc = SLURM_ERROR;
		goto cleanup;
	}
	debug2( "_set_limit: %s setrlimit %s succeeded",
			u_req_propagate?"user":"conf", rlimit_name );

cleanup:
	xfree(rlimit_name);
	return rc;
}

/*
 * Determine the value of the env var 'name' (if it exists) and whether
 * or not the user wants to use its value as the jobs soft rlimit.
 */
static int _get_env_val(char **env, const char *name, unsigned long *valp,
		bool *u_req_propagate)
{
	char *val    = NULL;
	char *p      = NULL;

	xassert(env  != NULL);
	xassert(name != NULL);

	if (!(val = getenvp(env, name)))
		return (-1);

	/*
	 * The letter 'U' would have been prepended to the string value if the
	 * user requested to have this rlimit propagated via 'srun --propagate'
	 */
	if (*val == 'U') {
		*u_req_propagate = true;
		debug2( "_get_env_val: %s propagated by user option", &name[6]);
		val++;
	}
	else
		*u_req_propagate = false;

	*valp = strtoul(val, &p, 10);

	if (p && (*p != '\0'))  {
		error("Invalid %s env var, value = `%s'", name, val);
		return (-1);
	}

	return (0);
}
